Package org.terrier.structures.indexing.singlepass

Source Code of org.terrier.structures.indexing.singlepass.BlockFieldPostingInRun

/*
* Terrier - Terabyte Retriever
* Webpage: http://terrier.org
* Contact: terrier{a.}dcs.gla.ac.uk
* University of Glasgow - School of Computing Science
* http://www.gla.ac.uk/
*
* The contents of this file are subject to the Mozilla Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* The Original Code is BlockFieldPostingInRun.java.
*
* The Original Code is Copyright (C) 2004-2011 the University of Glasgow.
* All Rights Reserved.
*
* Contributor(s):
*   Roi Blanco (rblanc{at}@udc.es)
*   Craig Macdonald (craigm{at}dcs.gla.ac.uk)
*/
package org.terrier.structures.indexing.singlepass;

import java.io.IOException;
import java.util.Arrays;

import org.terrier.compression.BitOut;
import org.terrier.structures.FieldLexiconEntry;
import org.terrier.structures.LexiconEntry;
import org.terrier.structures.postings.BlockFieldPostingImpl;
import org.terrier.structures.postings.IterablePosting;
import org.terrier.structures.postings.WritablePosting;

/** Class holding the information for a posting list read
* from a previously written run at disk. Used in the merging phase of the Single pass inversion method.
* This class knows how to append itself to a {@link org.terrier.compression.BitOut} and it
* represents a posting with blocks and field information <code>(tf, df, [docid, idf, fieldScore, blockFr [blockid]])</code>
* @author Roi Blanco
*
*/
public class BlockFieldPostingInRun extends BlockPostingInRun{
  /** The number of different fields that are used for indexing field information.*/ 
  protected final int fieldTags;
 
  protected final int[] fieldTFs;
 
  /**
   * Constructor for the class.
   */
  public BlockFieldPostingInRun(int _fieldTags) {
    super();
    fieldTags = _fieldTags;
    fieldTFs = new int[fieldTags];
  }

  /**
   * Writes the document data of this posting to a {@link org.terrier.compression.BitOut}
   * It encodes the data with the right compression methods.
   * The stream is written as <code>d1, idf(d1), fields, blockNo(d1), bid1, bid2, ...,  d2 - d1, idf(d2), fields, blockNo(d2), ...</code> etc
   * @param bos BitOut to be written.
   * @param last int representing the last document written in this posting.
   * @param runShift amount of delta to apply to the first posting read.
   * @return The last posting written.
   */ 
  public int append(BitOut bos, int last, int runShiftthrows IOException{
    int current = runShift - 1;
    for(int i = 0; i < termDf; i++){
      int docid = postingSource.readGamma() + current;
      bos.writeGamma(docid - last);
      bos.writeUnary(postingSource.readGamma());
      current = last = docid;
      //deal with fields
      for(int f=0;f<fieldTags;f++)
      {
        int tff = postingSource.readUnary() -1;
        fieldTFs[f] += tff;
        bos.writeUnary(tff +1);
      }
      //now deal with blocks
      final int numOfBlocks = postingSource.readUnary() -1;
      bos.writeUnary(numOfBlocks+1);
      if (numOfBlocks > 0)
        for(int j = 0; j < numOfBlocks; j++){
          /* we're reading and saving gaps here, not blockids */
          bos.writeGamma(postingSource.readGamma());
        }
    }
    try{
      postingSource.align();
    }catch(Exception e){
      // last posting
    }
    return last;
  }
 
  @Override
  public LexiconEntry getLexiconEntry() {
    FieldLexiconEntry fes = new FieldLexiconEntry(fieldTFs.length);
    fes.setStatistics(termDf, termTF);
    fes.setFieldFrequencies(fieldTFs);
    return fes;
  }
 
  @Override
  public void addToLexiconEntry(LexiconEntry _le)
  {
    super.addToLexiconEntry(_le);
    FieldLexiconEntry le = (FieldLexiconEntry)_le;
    int[] tffs = le.getFieldFrequencies();
    addTo(tffs, fieldTFs);
  }
 
  protected static void addTo(int[] target, int[] additions)
  {
    for(int i=0;i<target.length;i++)
    {
      target[i] += additions[i];
    }
  }
 
  protected class bfPIRPostingIterator extends PIRPostingIterator implements
    org.terrier.structures.postings.FieldPosting,
    org.terrier.structures.postings.BlockPosting
  {
    protected int[] fieldFrequencies = new int[fieldTags];
    protected int blockFreq;
    protected int[] blockIds;
   
    public bfPIRPostingIterator(int runShift) {
      super(runShift);
    }

    public int[] getFieldFrequencies() {
      return fieldFrequencies;
    }

    public int[] getFieldLengths() {
      return null;
    }

    @Override
    protected void readPostingNotDocid() throws IOException {
      super.readPostingNotDocid();
      for(int fi = 0; fi < fieldTags;fi++)
      {
        fieldFrequencies[fi] = postingSource.readUnary() -1;
      }
      blockIds = new int[postingSource.readUnary() -1];
      blockIds[0] = postingSource.readGamma()-1;
      for(int i=1;i<blockFreq;i++)
        blockIds[i] = postingSource.readGamma() - blockIds[i-1];
    }

    public int[] getPositions() {
      return blockIds;
    }
   
    public WritablePosting asWritablePosting() {
      BlockFieldPostingImpl bp = new BlockFieldPostingImpl(docid, frequency, blockIds, fieldFrequencies.length);
      System.arraycopy(fieldFrequencies, 0, bp.getFieldFrequencies(), 0, fieldFrequencies.length);
      return bp;
    }
       
  }
 
  @Override
  public void setTerm(String term) {
    super.setTerm(term);
    Arrays.fill(fieldTFs, 0);
  }
 
  @Override
  public IterablePosting getPostingIterator(final int runShift) throws IOException
  {
    return new bfPIRPostingIterator(runShift);
  }
}
TOP

Related Classes of org.terrier.structures.indexing.singlepass.BlockFieldPostingInRun

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.